### Replication script for mañaneras text analysis
library(tidyverse)
library(MetBrewer)
library(xtable)

text_data <- readRDS("text_data.rds")

### Frequencies for Table 1
ine_refs <- filter(text_data, target == "INE")
table(ine_refs$corrupt == -1)
table(ine_refs$competent == -1)
table(ine_refs$expensive == -1)
table(ine_refs$conservative == -1)

### Prepare to create Figure 1
text_data <- text_data %>%
  # Take average rhetorical scores by target
  group_by(target) %>%
  summarize(across(c(n:expensive), ~ mean(., na.rm = T))) %>%
  ungroup() %>%
  # Clean up for plotting
  pivot_longer(cols = sentiment:expensive, names_to = "dimension", values_to = "score") %>%
  mutate(dimension = case_when(
    dimension == "conservative" ~ "Conservative → Liberal",
    dimension == "corrupt" ~ "Corrupt → Honest",
    dimension == "expensive" ~ "Expensive → Inexpensive",
    dimension == "sentiment" ~ "Bad → Good",
    dimension == "competent" ~ "Incompetent → Competent"
  )) %>%
  rename(`Number of\nmentions` = n, Target = target, Score = score) %>%
  # Re-level factors
  mutate(dimension = factor(dimension, levels = c("Bad → Good", "Incompetent → Competent", "Conservative → Liberal", "Corrupt → Honest", "Expensive → Inexpensive"))) %>%
  mutate(`Target type` = factor(case_when(
    Target %in% c("PRI", "PAN", "Morena") ~ "Political party",
    TRUE ~ "Institution"
  ), levels  = c("Institution", "Political party"))) %>%
  mutate(Target = factor(Target, levels = c("Morena", "PAN", "PRI", "Military", "Congress", "Courts", "INE"))) 

# Figure 1
ggplot(text_data, aes(x = Target, y = Score)) +
  geom_hline(yintercept = 0, linetype = 3) +
  geom_point(aes(size = `Number of\nmentions`, color = `Target type`)) +
  facet_wrap(~ dimension, scales = "free_x") +
  coord_flip() +
  scale_color_manual(values = met.brewer("Greek", 3), name = "") +
  guides(size = guide_legend(title = "Number of\nmentions")) +
  theme(
    # PANEL & STRIPS
    panel.background = element_rect(fill = "white", color = "black", size = .25),
    panel.border     = element_rect(fill = NA, color = "black", size = .25),
    strip.background = element_rect(fill = "#F7F7F7", color = "black", size = .25),
    strip.text       = element_text(size = 12, face = "bold", hjust = 0.5, color = "black"),
    
    # >>> THIN GRIDLINES <<<
    panel.grid.major.x = element_line(color = "gray80", linetype = "dotted", linewidth = 0.3),
    panel.grid.major.y = element_line(color = "gray80", linetype = "dotted", linewidth = 0.3),
    panel.grid.minor   = element_blank(),
    # <<< END GRIDLINES >>>
    
    panel.spacing = unit(2, "lines"),
    
    # AXES & TITLES
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x  = element_text(size = 12, color = "black"),
    axis.text.y  = element_text(size = 12, color = "black", face = "bold"),
    
    # LEGEND
    legend.position       = "bottom",
    legend.text           = element_text(size = 11),
    legend.title          = element_text(size = 13, face = "bold"),
    legend.background     = element_blank(),
    legend.key            = element_blank(),
    legend.box.background = element_blank(),
    
    # PLOT TITLE
    plot.title = element_text(size = 14, hjust = .5)
  )

ggsave(file = "figures/text_analysis.pdf", height = 6, width = 10, bg = "white",
       device = cairo_pdf)


### Validation (appendix)
val_set <- readRDS("val_set.rds")

# Output confusion matrices
table(val_set$sentiment_val, val_set$sentiment)
table(val_set$competent_val, val_set$competent)
table(val_set$corrupt_val, val_set$corrupt)
table(val_set$conservative_val, val_set$conservative)
table(val_set$expensive_val, val_set$expensive)

# Calculate error rates and directions across rhetorical targets
xtable(val_set %>%
         group_by(target = factor(target, levels = c("INE", "Courts", "Congress", "Military", "Morena", "PAN", "PRI"))) %>%
         mutate_at(c("sentiment", "corrupt", "conservative", "competent", "expensive", "sentiment_val", "corrupt_val", "conservative_val", "competent_val", "expensive_val"), ~ as.numeric(.)) %>%
         # Rescale to 0-1 for ease of interpretation
         mutate_at(vars(sentiment:expensive_val), ~ (. + 1) / 2) %>%
         summarize_at(c("sentiment", "corrupt", "conservative", "competent", "expensive", "sentiment_val", "corrupt_val", "conservative_val", "competent_val", "expensive_val"), ~ mean(., na.rm = T)) %>%
         transmute(target = target,
                   sentiment_difference = sentiment - sentiment_val,
                   competent_difference = competent - competent_val,
                   corrupt_difference = corrupt - corrupt_val,
                   conservative_difference = conservative - conservative_val,
                   expensive_difference = expensive - expensive_val))


